# -*- coding: utf-8 -*-
"""
@Auth:
@Time: 2022-11-06 9:53
"""
# 送入模型前的预处理
import pandas as pd

import common


def to_date_type(year, month):
    if month < 10:
        return str(year) + "-0" + str(month)
    else:
        return str(year) + "-" + str(month)


def divide_by_month(data_frame: pd.DataFrame):
    """
    通过月份来划分数据集，并持久化
    """
    data_frame["事故日期"] = pd.to_datetime(data_frame["事故日期"], errors='coerce')
    data_frame = data_frame.groupby([data_frame["事故日期"].dt.year, data_frame["事故日期"].dt.month])
    arr = []
    for item in data_frame:
        date = to_date_type(item[0][0], item[0][1])
        data_item = item[1]["事故发生城市名称"].value_counts()
        data_item["时间"] = date
        data_item["受伤人数"] = item[1]["受伤人数"].sum()
        data_item["死亡人数"] = item[1]["死亡人数"].sum()
        data_item["总理赔金额"] = item[1]["理赔金额"].sum()
        data_item["总出险数量"] = len(item[1])
        arr.append(data_item)

    feature_frame = pd.DataFrame(arr)
    feature_frame.fillna(0, inplace=True)
    feature_frame.to_csv(common.TRAIN_DATA_PATH, index=False, encoding="gbk")


if __name__ == '__main__':
    data = pd.read_csv(common.PRE_HANDLED_DATA_PATH)
    # do(data)
    divide_by_month(data)
